# Preamble ----------------

# Organize wind speed and turbine data into hexagons 
# In order to construct instrumental variables

library(sp) #package needed to import and manipulate raster datasets
library(rgdal) #package needed to import ascii files into R
library(raster) #package needed to manipulate raster files
library(rgeos)
library(tigris)
library(readxl)
library(doBy)
#install.packages("pscl")
library(pscl)
library(plm)
library(fixest)

# Create Hexagons from Lower 48 -----------------

# read shapefile as SpatialPolygonsDataFrame from TIGER/LINE shapefiles (tiger package)
states <- states(cb = FALSE, class = "sp") 

#class(states)   # SpatialPolygonsDataFrame object
# check coordinate system/projection
#proj4string(states) # North American Datum 83 Geographic Coordinate System

# subset to lower 48
states48 <- states[states@data$NAME!="United States Virgin Islands"
                   & states@data$NAME!="Guam"
                   & states@data$NAME!="Commonwealth of the Northern Mariana Islands"
                   & states@data$NAME!="Hawaii"
                   & states@data$NAME!="American Samoa"
                   & states@data$NAME!="Alaska"
                   & states@data$NAME!="Puerto Rico",]
#plot(states48)

# store it as SpatialPolygons
SP.states48 <- as(states48, "SpatialPolygons")

# use spsample to draw n hexagons, stored as SpatialPoints
HexPts <-spsample(SP.states48,type="hexagonal", n=250000, offset=c(0,0))
# convert to SpatialPolygons
HexPols <- HexPoints2SpatialPolygons(HexPts)

# Re-project into WGS84
#crs<-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#HexWGS84 <-spTransform(HexPols, CRS=crs)

# Create dataset of points of centers
# Assign a projection
#crs <- "+proj=longlat +datum=NAD83 +no_defs"
#proj4string(HexPols) <- CRS(crs)
#centers <- as.data.frame(gCentroid(HexPols, byid=TRUE))
#colnames(centers) <- c("lon","lat")
#write.csv(centers,"[directory]\\WindResourceLocationQueries.csv", row.names = FALSE)

# Re-project into WGS84
crs<-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
HexWGS84 <-spTransform(HexPols, CRS=crs)
HexWGS84 <- as(HexWGS84,"SpatialPolygonsDataFrame")

#Now add labels to each hexagon for unique ID
text(coordinates(HexWGS84), 
     labels=sapply(slot(HexWGS84, "polygons"), 
                   function(i) slot(i, "ID")), cex=0.3)


# Wind Turbine Locations and Data ---------

# this data is each turbine, not just each plant: 
setwd("[directory location of us wind turbine database]")
uswindturbines <- read_excel("uswtdb_public_070318_pid.xlsx", sheet = "Data")
turbines48 <- subset(uswindturbines,t_state != "AK"
                     & t_state != "PR"
                     & t_state != "GU"
                     & t_state != "HI"
                     & p_tnum < 1400
                     & p_tnum > 7 # 2.5 percentile
                     & p_cap > 1 & p_year>1994)

# quantile(uswindturbines$p_tnum[uswindturbines$p_tnum<1400],probs = c(0.025))
turbcoord48 <-data.frame(cbind(turbines48$xlong, turbines48$ylat)) 
names(turbcoord48)<-c("Lon_T","Lat_T") 

# keep only relevant variables for later
turbines48<-data.frame(cbind(as.numeric(turbines48$xlong), 
                             as.numeric(turbines48$ylat),
                             turbines48$t_state, turbines48$t_county,
                             turbines48$t_fips, turbines48$p_name,
                             turbines48$p_year, turbines48$p_tnum,
                             turbines48$p_cap, turbines48$t_cap)) 
names(turbines48)<-c("Lon_T","Lat_T","t_state","County","FIPS","PName","Year",
                     "p_tnum","p_cap","t_cap") 
# convert missings to NAs
turbines48$t_cap[turbines48$t_cap==-9999] <- NA
turbines48$p_cap[turbines48$p_cap==-9999] <- NA
# could fill in average per-turbine cap, e.g., PCap/TNum if TCap = -9999
# but PCap is often missing

# Convert to SpatialPointsDataFrame
t.spoint.p <- SpatialPointsDataFrame(turbcoord48[, c("Lon_T", "Lat_T")], 
                                     data.frame(ID=seq(1:nrow(turbcoord48))), 
                                     proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))

# Point-in-Polygon merge -------------

# Give each hexagon an ID
ids <- 1:nrow(HexWGS84)
HexWGS84$ids <- ids 

turbs.in.hex <- sp::over(t.spoint.p,HexWGS84)
turbs.in.hex$tid <- row.names(turbs.in.hex)

turb48 <- cbind(turbs.in.hex,turbines48)
turb48$t_cap <- as.numeric(turb48$t_cap)
turb48$Year <- as.numeric(turb48$Year)


# Aggregate capacities within a hexagon ---------------

setwd("C:/Users/bgilbert_a/Dropbox/EnergyImpactsAggregationBias")
windspeeds <- read.csv("windspeeds.csv", header=TRUE, sep=",")
w.ids <- 1:nrow(windspeeds)
windspeeds$w.ids <- w.ids


#   - make Hex-year panel
#   - Regress aggregate on: 
#     - 2a. wind speed, year dummy, wind speed X year interaction
#     - 2b. wind speed X annual US development time series (shift-share)
#         - or with hex dummy, year dummy, and shift-share interaction
#


## Approach: Shift-shares/Feyrer-Mansur-Sacerdote ----------------

sumfun <- function(x, ...){
  c(s=sum(x, na.rm=TRUE, ...)/1000, l=length(x))
}
# get annual trends for shift-share
# number of turbines and total capacity
annual.turbcap <- summaryBy(t_cap ~ Year, FUN=sumfun,data=turb48)
annual.turbcap$USturbcap <- cumsum(annual.turbcap$t_cap.s)
annual.turbcap$USturbnum <- cumsum(annual.turbcap$t_cap.l)
# number of plants
plants <- summaryBy(Year ~ PName,FUN=mean,data=turb48)
plants$Year <- round(plants$Year.mean)
plants <- summaryBy(PName ~ Year,FUN=length,data=plants)
plants$USplantnum <- cumsum(plants$PName.length)
annual.turbcap <- merge(annual.turbcap,plants,by.x=c("Year"),by.y=c("Year"))


# Collapse turbine number and capacity by Hex ID in EACH YEAR and then append
hexwind.panel.i <- summaryBy(t_cap ~ ids, FUN=sumfun,data=subset(turb48,Year <=2000))
hexcap.panel.i <- merge(windspeeds,hexwind.panel.i,
                      by.x=c("w.ids"),by.y=c("ids"),all.x=TRUE)
hexcap.panel.i$t_cap.s[is.na(hexcap.panel.i$t_cap.s)] <- 0
hexcap.panel.i$t_cap.l[is.na(hexcap.panel.i$t_cap.l)] <- 0
hexcap.panel.i$Year <- 2000
hexcap.panel.i$USturbcap.new <- annual.turbcap$t_cap.s[annual.turbcap$Year==2000]
hexcap.panel.i$USturbnum.new <- annual.turbcap$t_cap.l[annual.turbcap$Year==2000]
hexcap.panel.i$USplntnum.new <- annual.turbcap$PName.length[annual.turbcap$Year==2000]
hexcap.panel.i$USturbcap <- annual.turbcap$USturbcap[annual.turbcap$Year==2000]
hexcap.panel.i$USturbnum <- annual.turbcap$USturbnum[annual.turbcap$Year==2000]
hexcap.panel.i$USplntnum <- annual.turbcap$USplantnum[annual.turbcap$Year==2000]

hexcap.panel <- hexcap.panel.i

for (i in 2001:2015) {
  hexwind.panel.i <- summaryBy(t_cap ~ ids, FUN=sumfun,data=subset(turb48,Year <=i))
  hexcap.panel.i <- merge(windspeeds,hexwind.panel.i,
                          by.x=c("w.ids"),by.y=c("ids"),all.x=TRUE)
  hexcap.panel.i$t_cap.s[is.na(hexcap.panel.i$t_cap.s)] <- 0
  hexcap.panel.i$t_cap.l[is.na(hexcap.panel.i$t_cap.l)] <- 0
  hexcap.panel.i$Year <- i
  hexcap.panel.i$USturbcap.new <- annual.turbcap$t_cap.s[annual.turbcap$Year==i]
  hexcap.panel.i$USturbnum.new <- annual.turbcap$t_cap.l[annual.turbcap$Year==i]
  hexcap.panel.i$USplntnum.new <- annual.turbcap$PName.length[annual.turbcap$Year==i]
  hexcap.panel.i$USturbcap <- annual.turbcap$USturbcap[annual.turbcap$Year==i]
  hexcap.panel.i$USturbnum <- annual.turbcap$USturbnum[annual.turbcap$Year==i]
  hexcap.panel.i$USplntnum <- annual.turbcap$USplantnum[annual.turbcap$Year==i]
  
  hexcap.panel <- rbind(hexcap.panel,hexcap.panel.i)
}

hexcap.panel <- hexcap.panel[order(hexcap.panel$w.ids,hexcap.panel$Year),]
#summary(lm(t_cap.s ~ wspeed_100m_mpersec * USturbcap, data=hexcap.panel))
hexcap.panel$wspeed2 <- hexcap.panel$wspeed_100m_mpersec^2
hexcap.panel$wspeed3 <- hexcap.panel$wspeed_100m_mpersec^3



hexcap.panel$log.cap=0
hexcap.panel$log.cap[hexcap.panel$t_cap.s>0]<- log(hexcap.panel$t_cap.s[hexcap.panel$t_cap.s>0])


# quasi poisson. Could to try state-year FE, not just year FE
qps <- feglm(t_cap.s ~ wspeed_100m_mpersec * USturbcap 
             +wspeed_100m_mpersec * USturbnum
             +wspeed2 * USturbcap + wspeed2*USturbnum
             | w.ids + Year, data=hexcap.panel, 
             family=quasipoisson,fixef.rm="none")

#zq <- fitted(qps) # don't need -stored in qps object
#zp <- predict.glm(qps,type="response")
summary(qps)

# index of observations: 
case.names.fixest <- function(object, ...){
  no <- object$obsRemoved
  seq_len(object$nobs_origin)[-no]
}
obsInclude <- case.names(qps)



temp1 <- as.data.frame(cbind(obsInclude,fitted(qps)))
colnames(temp1) <- c("obsInclude","cond.exp.y")
temp2 <- data.frame(matrix(NA, nrow = length(hexcap.panel[,1]), ncol = 1))
temp2$obs <- seq.int(nrow(temp2))

cond.exp.y <- merge(temp2,temp1,by.x="obs",by.y="obsInclude",all.x=TRUE)
rm(temp1,temp2)
cond.exp.y <- subset(cond.exp.y,select = -c(2))
cond.exp.y$cond.exp.y[is.na(cond.exp.y$cond.exp.y)] <- 0

hexcap.panel <- cbind(hexcap.panel,cond.exp.y)

loc.fes <- as.data.frame(cbind(as.numeric(names(fes$w.ids)),as.numeric(fes$w.ids)))
colnames(loc.fes) <- c("w.ids","loc.fe")
hexcap.panel <- merge(hexcap.panel,loc.fes,by="w.ids",all.x=TRUE)
hexcap.panel$loc.alpha <- exp(hexcap.panel$loc.fe)
#hexcap.panel$loc.alpha[is.na(hexcap.panel$loc.fe)] <- 1

yr.fes <- as.data.frame(cbind(as.numeric(names(fes$Year)),as.numeric(fes$Year)))
colnames(yr.fes) <- c("Year","yr.fe")
hexcap.panel <- merge(hexcap.panel,yr.fes,by="Year",all.x=TRUE)
hexcap.panel$yr.alpha <- exp(hexcap.panel$yr.fe)
hexcap.panel <- hexcap.panel[order(hexcap.panel$w.ids,hexcap.panel$Year),]

hexcap.panel$lambhat <- exp(qps$coefficients[1]*
                           hexcap.panel$wspeed_100m_mpersec*hexcap.panel$USturbcap
                         +qps$coefficients[2]*hexcap.panel$wspeed_100m_mpersec*hexcap.panel$USturbnum
                         +qps$coefficients[3]*
                           hexcap.panel$wspeed2*hexcap.panel$USturbcap
                         +qps$coefficients[4]*
                           hexcap.panel$wspeed2*hexcap.panel$USturbnum)
hexcap.panel$pred.y = hexcap.panel$lambhat*hexcap.panel$loc.alpha*hexcap.panel$yr.alpha
cor(hexcap.panel$pred.y,hexcap.panel$cond.exp.y)


save(HexWGS84,turb48,file="HexagonsData.RData")





